Martinig, A. R., S. L. P. Burk, Y. Yang, M. Lagisz, and S. Nakagawa
Published
August 23, 2024
Things to do for April
Rerun all the models
Put figures at the end
Add some texts so the reader can understand
Go through code and add more annotations
Ask questions about things you do not understand
1 Setting up
1.1 Load packages
Code
pacman::p_load(tidyverse, # tidy family and related pacakges below kableExtra, # nice tables MuMIn, # multi-model inference emmeans, # post-hoc tests gridExtra, # may not use this pander, # nice tables metafor, # package for meta-analysis ape, # phylogenetic analysis MuMIn, # multi-model inference patchwork, # putting ggplots together - you need to install via devtool here, # making reading files easy orchaRd # plotting orchard plots# more too add)eval(metafor:::.MuMIn)
1.3 Calculate effect sizes and sampling variances using custom functions
Code
# function to calculate effect sizes# Zr - correlation# there is always neffect_size <-function(m1, m2, sd1, sd2, n1, n2, n, # 12 arguments est , se, p_val, direction, method){if(method =="mean_method"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s_pool <-sqrt( ((n1-1)*sd1^2+ (n2-1)*sd2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="count_method"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s1 <-sqrt(m1) s2 <-sqrt(m2) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="percent_method1"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 m1 <- m1/100 m2 <- m2/100 s1 <-1/sqrt(8) s2 <-1/sqrt(8) m1 <-asin(sqrt(m1/100)) m2 <-asin(sqrt(m2/100)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="percent_method2"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 m1 <- m1/100 m2 <- m2/100 sd1 <- sd1/100 sd2 <- sd2/100 s1 <-1/sqrt(sd1^2/(4*m1*(1-m1))) s2 <-1/sqrt(sd2^2/(4*m2*(1-m2))) m1 <-asin(sqrt(m1/100)) m2 <-asin(sqrt(m2/100)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="proportion_method1"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s1 <-1/sqrt(8) s2 <-1/sqrt(8) m1 <-asin(sqrt(m1)) m2 <-asin(sqrt(m2)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="proportion_method2"){ h <- n/n1 + n/n2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 s1 <-1/sqrt(sd1^2/(4*m1*(1-m1))) s2 <-1/sqrt(sd2^2/(4*m2*(1-m2))) m1 <-asin(sqrt(m1/100)) m2 <-asin(sqrt(m2/100)) s_pool <-sqrt( ((n1-1)*s1^2+ (n2-1)*s2^2) / (n -2) ) j <-1- (3/ (4*n -9)) d <- ((m2 - m1) / s_pool) * j r_pb <- d /sqrt(d^2+ h) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="t_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <- est/sqrt(est^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="t_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <- est/sqrt(est^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="F_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <-sqrt(est)/sqrt(est + n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b = r_b*(direction) r <- r_b }elseif(method =="F_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 r_pb <-sqrt(est)/sqrt(est + n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b = r_b*(direction) r <- r_b }elseif(method =="p_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <-qt(1- p_val, n -2) r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b <- r_b*(direction) r <- r_b }elseif(method =="p_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <-qt(1- p_val, n -2) r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r_b <- r_b*(direction) r <- r_b }elseif(method =="correlation_method1"){ r <- est }elseif(method =="correlation_method2"){ r <-2*sin((pi/6)*est) }elseif(method =="estimate_method1"){ p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <- est/se r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r }elseif(method =="estimate_method2"){ n1 <- n/2 n2 <- n/2 p <- n1/n # prop for n1 q <- n2/n # prop for n2 t <- est/se r_pb <- t/sqrt(t^2+ n -2) r_b <- r_pb*(sqrt(p*q)/dnorm(qnorm(p))) r <- r_b #r_b = r } if(r >=1){# if over 1, we use 0.99 Zr <-atanh(0.99) }elseif(r <=-1){ Zr <-atanh(-0.99) # r = correlation } else { Zr <-atanh(r) # r = correlation } VZr <-1/(n -3)data.frame(ri = r, yi = Zr , vi = VZr)}###########' Title: Contrast name generator#'#' @param name: a vector of character stringscont_gen <-function(name) { combination <-combn(name, 2) name_dat <-t(combination) names <-paste(name_dat[, 1], name_dat[, 2], sep ="-")return(names)}#' @title get_pred1: intercept-less model#' @description Function to get CIs (confidence intervals) and PIs (prediction intervals) from rma objects (metafor)#' @param model: rma.mv object #' @param mod: the name of a moderator get_pred1 <-function (model, mod =" ") { name <-firstup(as.character(stringr::str_replace(row.names(model$beta), mod, ""))) len <-length(name)if (len !=1) { newdata <-diag(len) pred <- metafor::predict.rma(model, newmods = newdata,tau2.levels =1:len) }else { pred <- metafor::predict.rma(model) } estimate <- pred$pred lowerCL <- pred$ci.lb upperCL <- pred$ci.ub lowerPR <- pred$cr.lb upperPR <- pred$cr.ub table <-tibble(name =factor(name, levels = name, labels = name), estimate = estimate,lowerCL = lowerCL, upperCL = upperCL,pval = model$pval,lowerPR = lowerPR, upperPR = upperPR)}#' @title get_pred2: normal model#' @description Function to get CIs (confidence intervals) and PIs (prediction intervals) from rma objects (metafor)#' @param model: rma.mv object #' @param mod: the name of a moderator get_pred2 <-function (model, mod =" ") { name <-as.factor(str_replace(row.names(model$beta), paste0("relevel", "\\(", mod,", ref = name","\\)"),"")) len <-length(name)if(len !=1){ newdata <-diag(len) pred <-predict.rma(model, intercept =FALSE, newmods = newdata[ ,-1]) }else { pred <-predict.rma(model) } estimate <- pred$pred lowerCL <- pred$ci.lb upperCL <- pred$ci.ub lowerPR <- pred$cr.lb upperPR <- pred$cr.ub table <-tibble(name =factor(name, levels = name, labels = name), estimate = estimate,lowerCL = lowerCL, upperCL = upperCL,pval = model$pval,lowerPR = lowerPR, upperPR = upperPR)}#' @title mr_results#' @description Function to put results of meta-regression and its contrasts#' @param res1: data frame 1#' @param res1: data frame 2mr_results <-function(res1, res2) { restuls <-tibble(`Fixed effect`=c(as.character(res1$name), cont_gen(res1$name)),Estimate =c(res1$estimate, res2$estimate),`Lower CI [0.025]`=c(res1$lowerCL, res2$lowerCL),`Upper CI [0.975]`=c(res1$upperCL, res2$upperCL),`P value`=c(res1$pval, res2$pval),`Lower PI [0.025]`=c(res1$lowerPR, res2$lowerPR),`Upper PI [0.975]`=c(res1$upperPR, res2$upperPR), )}#' @title all_models#' @description Function to take all possible models and get their results#' @param model: intercept-less model#' @param mod: the name of a moderator all_models <-function(model, mod =" ", type ="normal") {# getting the level names out level_names <-levels(factor(model$data[[mod]])) dat2 <- model$data mod <- mod# creating a function to run models run_rma1 <-function(name) {# variance covarinace matrix for sampling errors VCV1 <-vcalc(vi = dat$vi, cluster = dat$shared_group, rho =0.5) VCV1 <-nearPD(VCV1)$matrma.mv(yi, V = VCV1,mods =~relevel(dat2[[mod]], ref = name),random =list(~1| effectID,~1| paperID,~1| species_cleaned),data = dat2,test ="t",sparse =TRUE,control =list(optimizer ="Nelder-Mead")) } run_rma2 <-function(name) {# variance covarinace matrix for sampling errors VCVa <-vcalc(vi = dat2$vi_abs, cluster = dat$shared_group, rho =0.5) VCVa <-nearPD(VCVa)$matrma.mv(abs_yi2, V = VCVa,mods =~relevel(dat2[[mod]], ref = name),random =list(~1| effectID,~1| paperID,~1| species_cleaned),data = dat2,test ="t",sparse =TRUE,control =list(optimizer ="Nelder-Mead")) } run_rma3 <-function(name) {# variance covarinace matrix for sampling errors VCV1 <-vcalc(vi = dat$vi, cluster = dat$shared_group, rho =0.5) VCV1 <-nearPD(VCV1)$matrma.mv(yi, V = VCV1,mods =~relevel(dat2[[mod]], ref = name),random =list( # hetero in relation to modformula(paste("~", mod, "| effectID")),~1| paperID,~1| species_cleaned),struct ="DIAG",data = dat2,test ="t",sparse =TRUE,control =list(optimizer ="Nelder-Mead")) }# results of meta-regression including all contrast results; taking the last level out ([-length(level_names)])# use different specifications for aboslute and hetero models if (type =="normal"){ model_all <- purrr::map(level_names[-length(level_names)], run_rma1) }elseif(type =="absolute"){ model_all <- purrr::map(level_names[-length(level_names)], run_rma2) }elseif(type =="hetero"){ model_all <- purrr::map(level_names[-length(level_names)], run_rma3) }# getting estimates from intercept-less models (means for all the groups) res1 <-get_pred1(model, mod = mod)# getting estiamtes from all contrast models res2_pre <- purrr::map(model_all, ~get_pred2(.x, mod = mod))# a list of the numbers to take out unnecessary contrasts contra_list <-Map(seq, from=1, to=1:(length(level_names) -1)) res2 <- purrr::map2_dfr(res2_pre, contra_list, ~.x[-(.y), ]) # creating a table res_tab <-mr_results(res1, res2) %>%kable("html", digits =3) %>%kable_styling("striped", position ="left") %>%scroll_box(width ="100%")# results res_tab}
1.4 Preparing final dataset
Code
# adding effect sizes to dataseteffect2 <-pmap_dfr(list(dat$mean_group_1, dat$mean_group_2, dat$variance_group_1, dat$variance_group_2, dat$n_group_1, dat$n_group_2, dat$n, dat$effect_size_value, dat$effect_size_variance, dat$effect_size_p_value_numeric, dat$direction_change, factor(dat$function_needed)), effect_size) # merging two data framesdat <-cbind(dat, effect2)# renaming X to effectIDcolnames(dat)[colnames(dat) =="X"] <-"effectID"# creating the phylogeny columndat$phylogeny <-gsub(" ", "_", dat$species_cleaned)# checking species names match between tree and dataset# match(unique(dat$phylogeny), tree$tip.label)# match(tree$tip.label, unique(dat$phylogeny))# # intersect(unique(dat$phylogeny), tree$tip.label)# setdiff(unique(dat$phylogeny), tree$tip.label)# # # match(unique(dat$phylogeny), tree$tip.label)# sum(is.na(match(unique(dat$phylogeny), tree$tip.label)))# creating a correlation matrix from a treetree <-compute.brlen(tree)cor_tree <-vcv(tree,corr=T) # creating a VCV for sampling error variances # we assure shared groups will have r = 0.5# it is fine if these are not positive definite but changing thisVCV <-vcalc(vi = dat$vi, cluster = dat$shared_group, rho =0.5)# focing the matrix to be positive definite# this should be in the methodVCV <-nearPD(VCV)$mat
# We remove phylogeny as a random effect from our meta-analytic model because phylogeny accounts for nothing (0%)mod1 <-rma.mv(yi = yi, V = VCV,mod =~1,data = dat, random =list(~1| effectID,~1| paperID,~1| species_cleaned),test ="t",sparse =TRUE)summary(mod1)
#I want to move this up#For each of our moderators, we run a uni-moderator meta-regression modelmod_class <-rma.mv(yi = yi, V = VCV,mod =~ species_class -1,data = dat, random =list(~1| effectID,~1| paperID,~1| species_cleaned),test ="t",sparse =TRUE)summary(mod_class)
orchard_plot(mod_confirm, mod ="confirmation_bias", xlab ="Effect Size: Zr", group ="paperID", branch.size =4, angle =90)
Code
tab_confirm <-all_models(mod_confirm, mod ="confirmation_bias", type ="normal")# saving tab_sex as RDSsaveRDS(tab_confirm, here("Rdata", "tab_confirm.RDS"))
# the interaction between 2 categorical variables are the same as creating a new variable with 2 categories combined so we use that approach dat$sex_species_class <-paste(dat$sex, dat$species_class ,sep ="_")mod_int1 <-rma.mv(yi = yi, V = VCV,mod =~ sex_species_class -1,data = dat, random =list(~1| effectID,~1| paperID,~1| species_cleaned),test ="t",sparse =TRUE)summary(mod_int1)
# use "ML" for model selection######################## Mulit-variable models######################## we need - tomod_full <-rma.mv(yi = yi, V = VCV,mod =~1+ sex + age_class_clean + whose_fitness + fitness_higher_level + dispersal_type + dispersal_phase,data = dat, random =list(~1| effectID,~1| paperID,~1| species_cleaned),method ="ML",test ="t",sparse =TRUE)summary(mod_full)
# relative importance of each predictor for all the modelsimportance <-sw(candidates)importance
whose_fitness dispersal_phase age_class_clean
Sum of weights: 0.42 0.37 0.33
N containing models: 32 32 32
fitness_higher_level sex dispersal_type
Sum of weights: 0.24 0.23 0.15
N containing models: 32 32 32